{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Populating the interactive namespace from numpy and matplotlib\n"
     ]
    }
   ],
   "source": [
    "%pylab inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from util.dataset import read_data_set\n",
    "from util.image import half"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Prepare the data."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from scipy.ndimage import convolve\n",
    "def nudge_dataset(X, Y):\n",
    "    direction_vectors = [\n",
    "        [[0, 1, 0],\n",
    "         [0, 0, 0],\n",
    "         [0, 0, 0]],\n",
    "\n",
    "        [[0, 0, 0],\n",
    "         [1, 0, 0],\n",
    "         [0, 0, 0]],\n",
    "\n",
    "        [[0, 0, 0],\n",
    "         [0, 0, 1],\n",
    "         [0, 0, 0]],\n",
    "\n",
    "        [[0, 0, 0],\n",
    "         [0, 0, 0],\n",
    "         [0, 1, 0]]]\n",
    "    \n",
    "    matrix_size = (int(sqrt(X.shape[1])),) * 2\n",
    "\n",
    "    shift = lambda x, w: convolve(x.reshape(matrix_size), mode='constant',\n",
    "                                  weights=w).ravel()\n",
    "    X = np.concatenate([X] +\n",
    "                       [np.apply_along_axis(shift, 1, X, vector)\n",
    "                        for vector in direction_vectors])\n",
    "    Y = np.concatenate([Y for _ in range(5)], axis=0)\n",
    "    return X, Y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "X_train, y_train = (asarray(d) for d in read_data_set('train', lambda d: half(half(d))))\n",
    "# X_train, y_train = nudge_dataset(X_train, y_train)\n",
    "X_train = (X_train - np.min(X_train, 0)) / (np.max(X_train, 0) + 0.0001)  # 0-1 scaling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "X_test, y_test = (asarray(d) for d in read_data_set('t10k', lambda d: half(half(d))))\n",
    "# X_test, y_test = nudge_dataset(X_test, y_test)\n",
    "X_test = (X_test - np.min(X_test, 0)) / (np.max(X_test, 0) + 0.0001)  # 0-1 scaling"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Configuring Logistic Regression."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "lr = LogisticRegression()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Perform a Grid Search."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn.grid_search import GridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 3 folds for each of 10 candidates, totalling 30 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=2)]: Done   1 jobs       | elapsed:    5.5s\n",
      "[Parallel(n_jobs=2)]: Done  28 out of  30 | elapsed:  2.5min remaining:   10.5s\n",
      "[Parallel(n_jobs=2)]: Done  30 out of  30 | elapsed:  2.8min finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=None, error_score='raise',\n",
       "       estimator=LogisticRegression(C=5000, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='ovr',\n",
       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
       "          verbose=0),\n",
       "       fit_params={}, iid=True, loss_func=None, n_jobs=2,\n",
       "       param_grid={'C': (1, 10, 100, 1000, 5000), 'penalty': ('l1', 'l2')},\n",
       "       pre_dispatch='2*n_jobs', refit=True, score_func=None, scoring=None,\n",
       "       verbose=1)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "grid_search = GridSearchCV(lr, dict(\n",
    "        C=(1, 10, 100, 1000, 5000),\n",
    "        penalty=('l1', 'l2')\n",
    "    ), n_jobs = 2, verbose=1)\n",
    "grid_search.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=5000, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='ovr',\n",
       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
       "          verbose=0)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lr.set_params(**grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Training the classifier."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LogisticRegression(C=5000, class_weight=None, dual=False, fit_intercept=True,\n",
       "          intercept_scaling=1, max_iter=100, multi_class='ovr',\n",
       "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
       "          verbose=0)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lr.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Evaluation."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "LogisticRegression(C=5000, class_weight=None, dual=False, fit_intercept=True,\n",
      "          intercept_scaling=1, max_iter=100, multi_class='ovr',\n",
      "          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,\n",
      "          verbose=0)\n",
      "             precision    recall  f1-score   support\n",
      "\n",
      "          0       0.94      0.97      0.95       980\n",
      "          1       0.96      0.98      0.97      1135\n",
      "          2       0.91      0.86      0.89      1032\n",
      "          3       0.86      0.90      0.88      1010\n",
      "          4       0.90      0.88      0.89       982\n",
      "          5       0.85      0.78      0.81       892\n",
      "          6       0.93      0.94      0.93       958\n",
      "          7       0.91      0.91      0.91      1028\n",
      "          8       0.82      0.80      0.81       974\n",
      "          9       0.85      0.88      0.86      1009\n",
      "\n",
      "avg / total       0.89      0.89      0.89     10000\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print('%s\\n%s' % (lr, metrics.classification_report(y_test, lr.predict(X_test))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
